Aron is demonstrating using pandas to read data and plot using matplotlib.



In [12]:

    
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline



In [5]:

    
# change path as needed.
PATH = 'all_immigrant_probs.csv'
data = pd.read_csv(PATH, header=None)



In [6]:

    
data.head()









    Out[6]:






  
    
      
      0
      1
      2
      3
      4
      5
      6
      7
      8
      9
      ...
      947
      948
      949
      950
      951
      952
      953
      954
      955
      956
    
  
  
    
      0
      -195.0
      0.080685
      0.083078
      0.105677
      NaN
      0.029739
      0.084379
      0.011826
      0.017002
      0.029653
      ...
      0.129844
      0.019499
      0.111091
      0.072375
      0.119258
      0.317903
      0.066854
      0.118523
      NaN
      0.009324
    
    
      1
      -185.0
      0.024921
      0.024944
      0.075406
      NaN
      0.037173
      0.046541
      0.022118
      NaN
      0.023581
      ...
      0.125411
      0.106249
      0.091641
      0.076613
      0.109993
      0.109533
      0.075414
      0.079401
      NaN
      0.023035
    
    
      2
      -175.0
      0.048208
      0.092144
      0.042366
      0.043871
      0.028122
      0.111163
      0.026134
      0.022043
      0.125895
      ...
      0.131912
      NaN
      0.032582
      0.131267
      0.083344
      0.206429
      0.106499
      0.124147
      NaN
      0.019503
    
    
      3
      -165.0
      0.062095
      0.058378
      0.026325
      0.023334
      0.029739
      0.067194
      0.036022
      0.027748
      0.035788
      ...
      0.117357
      NaN
      0.131863
      0.164130
      0.123599
      0.068147
      0.076054
      0.119952
      NaN
      0.026576
    
    
      4
      -155.0
      0.095869
      NaN
      0.050343
      0.025357
      0.032289
      0.101354
      0.037561
      NaN
      0.025916
      ...
      0.116511
      0.070994
      0.069701
      NaN
      0.117703
      0.033291
      0.084819
      0.147340
      NaN
      0.028604
    
  

5 rows × 957 columns



In [29]:

    
# plot first two users:
data.iloc[:,[0,1,2]].plot(x=0)









    Out[29]:





<matplotlib.axes._subplots.AxesSubplot at 0x115671828>



In [13]:

    
# plot all users. 
data.plot(x=0, legend=False)
plt.xlabel('time from date of immigration')
plt.ylabel('depression probability')









    Out[13]:





<matplotlib.text.Text at 0x1131d2128>



In [30]:

    
# plot first 50:
data.iloc[:,range(50)].plot(x=0, legend=False)
plt.xlabel('time from date of immigration')
plt.ylabel('depression probability')









    Out[30]:





<matplotlib.text.Text at 0x1160d75f8>



In [38]:

    
# plot overall mean.
data.iloc[:,range(1,len(data))].mean(axis=0).plot()









    Out[38]:





<matplotlib.axes._subplots.AxesSubplot at 0x1162a0780>

	0	1	2	3	4	5	6	7	8	9	...	947	948	949	950	951	952	953	954	955	956
0	-195.0	0.080685	0.083078	0.105677	NaN	0.029739	0.084379	0.011826	0.017002	0.029653	...	0.129844	0.019499	0.111091	0.072375	0.119258	0.317903	0.066854	0.118523	NaN	0.009324
1	-185.0	0.024921	0.024944	0.075406	NaN	0.037173	0.046541	0.022118	NaN	0.023581	...	0.125411	0.106249	0.091641	0.076613	0.109993	0.109533	0.075414	0.079401	NaN	0.023035
2	-175.0	0.048208	0.092144	0.042366	0.043871	0.028122	0.111163	0.026134	0.022043	0.125895	...	0.131912	NaN	0.032582	0.131267	0.083344	0.206429	0.106499	0.124147	NaN	0.019503
3	-165.0	0.062095	0.058378	0.026325	0.023334	0.029739	0.067194	0.036022	0.027748	0.035788	...	0.117357	NaN	0.131863	0.164130	0.123599	0.068147	0.076054	0.119952	NaN	0.026576
4	-155.0	0.095869	NaN	0.050343	0.025357	0.032289	0.101354	0.037561	NaN	0.025916	...	0.116511	0.070994	0.069701	NaN	0.117703	0.033291	0.084819	0.147340	NaN	0.028604